testsuite: Avoid using should_fail
authorSimon McVittie <smcv@debian.org>
Wed, 23 Nov 2022 18:36:08 +0000 (18:36 +0000)
committerSimon McVittie <smcv@debian.org>
Wed, 23 Nov 2022 18:54:02 +0000 (18:54 +0000)
There are two possible interpretations of "expected failure": either
the test *must* fail (exactly the inverse of an ordinary test, with
success becoming failure and failure becoming success), or the test
*may* fail (with success intended, but failure possible in some
environments). Autotools had the second interpretation, which seems
more useful in practice, but Meson has the first.

In GTK 3.24.35, if the environment is such that the label-sizing.ui
reftest happens to be successful, the overall result of the test suite
is failure. This seems unlikely to have been the intention.

Instead of using should_fail, put the tests in one of two new suites:
"flaky" is intended for tests that succeed or fail unpredictably
according to the test environment or chance, while "failing" is for
tests that ought to succeed but currently never do as a result of a
bug or missing functionality. With a sufficiently new version of Meson,
the flaky and failing tests are not run by default, but can be requested
with a command like:

    meson test --setup=unstable_tests --suite=flaky --suite=failing

This arrangement is inspired by GNOME/glib!2987, which was contributed
by Marco Trevisan.

Signed-off-by: Simon McVittie <smcv@debian.org>
.gitlab-ci/test-docker-meson.sh
testsuite/a11y/meson.build
testsuite/gtk/meson.build
testsuite/meson.build
testsuite/reftests/meson.build

index a116839a9d49a6fe7b91fcfdfc36f8c7b85c33ea..26abe0fd86e57a5a9403feec6dca7ee99c0d758f 100755 (executable)
@@ -22,9 +22,22 @@ meson \
 cd _build
 ninja
 
+# Meson < 0.57 can't exclude suites in a test_setup() so we have to
+# explicitly leave out the failing and flaky suites.
 xvfb-run -a -s "-screen 0 1024x768x24" \
     meson test \
         --timeout-multiplier 4 \
         --print-errorlogs \
         --suite=gtk+-3.0 \
-        --no-suite=gtk+-3.0:a11y
+        --no-suite=flaky \
+        --no-suite=failing
+
+# We run the flaky and failing tests to get them reported in the CI logs,
+# but if they fail (which we expect they often will), that isn't an error.
+xvfb-run -a -s "-screen 0 1024x768x24" \
+    meson test \
+        --timeout-multiplier 4 \
+        --print-errorlogs \
+        --suite=flaky \
+        --suite=failing \
+    || true
index fa8b0450305f0dcb5c9c79477b1e6e056d7556f4..85d0f5d46715951b1b495a20ade3f7e84341635a 100644 (file)
@@ -66,7 +66,7 @@ foreach t: a11y_state_tests
         'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
         'GTK_TEST_MESON=1',
       ],
-      suite: 'a11y',
+      suite: ['a11y', 'flaky'],
     )
   endif
 endforeach
@@ -100,7 +100,7 @@ foreach t: a11y_tests
                'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
                'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
              ],
-        suite: 'a11y')
+        suite: ['a11y', 'flaky'])
 endforeach
 
 installed_test_data = [
index 206af0dc2ce5b199379552278802bb1e1a3bc0b0..137f93a6269600404de0e53d746598c6e8ea0c64 100644 (file)
@@ -50,7 +50,9 @@ tests = [
   ['revealer-size'],
 ]
 
-# Tests that are expected to fail
+# Tests that are expected to fail, sometimes or always
+flaky = [
+]
 xfail = [
 ]
 
@@ -83,7 +85,15 @@ foreach t : tests
     install: get_option('installed_tests'),
     install_dir: installed_test_bindir)
 
-  expect_fail = xfail.contains(test_name)
+  suites = ['gtk']
+
+  if flaky.contains(test_name)
+    suites += 'flaky'
+  endif
+
+  if xfail.contains(test_name)
+    suites += 'failing'
+  endif
 
   test(test_name, test_exe,
        args: [ '--tap', '-k' ],
@@ -97,8 +107,7 @@ foreach t : tests
               'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
               'GTK_TEST_MESON=1',
             ],
-       suite: 'gtk',
-       should_fail: expect_fail,
+       suite: suites,
   )
 endforeach
 
index 289f2725e1ceb4a6b198ad3122bc3bdb4b96fbd6..032baf2979cc54aa9574f07989c49fb062764e8c 100644 (file)
@@ -2,6 +2,13 @@ gtk_libexecdir = join_paths(gtk_prefix, get_option('libexecdir'))
 installed_test_bindir = join_paths(gtk_libexecdir, 'installed-tests', 'gtk+')
 installed_test_datadir = join_paths(gtk_datadir, 'installed-tests', 'gtk+')
 
+if meson.version().version_compare('>=0.57.0')
+  add_test_setup('default',
+    is_default: true,
+    exclude_suites: ['flaky', 'failing'],
+  )
+endif
+
 subdir('gtk')
 subdir('gdk')
 subdir('css')
index 2135ebb4342ab2e8f5e5792a6b897d2b59124265..8c3c3e821520bc58d9aa495fdaa974dee8e559f9 100644 (file)
@@ -419,7 +419,7 @@ test_data = [
 ]
 
 # Depending on the environment these fail, feel free to fix them
-somehow_broken = [
+flaky = [
   'button-wrapping.ui',
   'cellrenderer-pixbuf-stock-rtl.ui',
   'label-sizing.ui',
@@ -428,9 +428,21 @@ somehow_broken = [
   'symbolic-icon-translucent-color.ui',
   'window-height-for-width.ui',
 ]
+xfail = [
+]
 
 foreach testname : test_data
   if testname.endswith('.ui') and not testname.endswith('.ref.ui')
+    suites = ['reftest']
+
+    if flaky.contains(testname)
+      suites += 'flaky'
+    endif
+
+    if xfail.contains(testname)
+      suites += 'failing'
+    endif
+
     # reftests fail when multiple windows open at the same time stealing the focus,
     # so set is_parallel to false
     test('reftest ' + testname, gtk_reftest,
@@ -450,8 +462,7 @@ foreach testname : test_data
                 'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
                 'REFTEST_MODULE_DIR=@0@'.format(meson.current_build_dir()),
               ],
-         suite: 'reftest',
-         should_fail: somehow_broken.contains(testname),
+         suite: suites,
          is_parallel: false)
   endif
 endforeach